{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "apply_demo.csv     demo_duplicate.csv \u001b[31mmovie_metadata.csv\u001b[m\u001b[m\r\n",
      "city_weather.csv   iris.csv\r\n"
     ]
    }
   ],
   "source": [
    "!ls ../homework"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('../homework/city_weather.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>city</th>\n",
       "      <th>temperature</th>\n",
       "      <th>wind</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>03/01/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>17/01/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>31/01/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>14/02/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>-3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28/02/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>13/03/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>27/03/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>-4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10/04/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>19</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>24/04/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>08/05/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>22/05/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>05/06/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>-10</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>19/06/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>03/07/2016</td>\n",
       "      <td>SH</td>\n",
       "      <td>-9</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>17/07/2016</td>\n",
       "      <td>GZ</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>31/07/2016</td>\n",
       "      <td>GZ</td>\n",
       "      <td>-1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>14/08/2016</td>\n",
       "      <td>GZ</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>28/08/2016</td>\n",
       "      <td>GZ</td>\n",
       "      <td>25</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>11/09/2016</td>\n",
       "      <td>SZ</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>25/09/2016</td>\n",
       "      <td>SZ</td>\n",
       "      <td>-10</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date city  temperature  wind\n",
       "0   03/01/2016   BJ            8     5\n",
       "1   17/01/2016   BJ           12     2\n",
       "2   31/01/2016   BJ           19     2\n",
       "3   14/02/2016   BJ           -3     3\n",
       "4   28/02/2016   BJ           19     2\n",
       "5   13/03/2016   BJ            5     3\n",
       "6   27/03/2016   SH           -4     4\n",
       "7   10/04/2016   SH           19     3\n",
       "8   24/04/2016   SH           20     3\n",
       "9   08/05/2016   SH           17     3\n",
       "10  22/05/2016   SH            4     2\n",
       "11  05/06/2016   SH          -10     4\n",
       "12  19/06/2016   SH            0     5\n",
       "13  03/07/2016   SH           -9     5\n",
       "14  17/07/2016   GZ           10     2\n",
       "15  31/07/2016   GZ           -1     5\n",
       "16  14/08/2016   GZ            1     5\n",
       "17  28/08/2016   GZ           25     4\n",
       "18  11/09/2016   SZ           20     1\n",
       "19  25/09/2016   SZ          -10     4"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = df.groupby(df['city'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.DataFrameGroupBy object at 0x10d45a128>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'BJ': Int64Index([0, 1, 2, 3, 4, 5], dtype='int64'),\n",
       " 'GZ': Int64Index([14, 15, 16, 17], dtype='int64'),\n",
       " 'SH': Int64Index([6, 7, 8, 9, 10, 11, 12, 13], dtype='int64'),\n",
       " 'SZ': Int64Index([18, 19], dtype='int64')}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g.groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_bj = g.get_group('BJ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(df_bj.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>temperature</th>\n",
       "      <th>wind</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>city</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>BJ</th>\n",
       "      <td>10.000</td>\n",
       "      <td>2.833333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GZ</th>\n",
       "      <td>8.750</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SH</th>\n",
       "      <td>4.625</td>\n",
       "      <td>3.625000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SZ</th>\n",
       "      <td>5.000</td>\n",
       "      <td>2.500000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      temperature      wind\n",
       "city                       \n",
       "BJ         10.000  2.833333\n",
       "GZ          8.750  4.000000\n",
       "SH          4.625  3.625000\n",
       "SZ          5.000  2.500000"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.DataFrameGroupBy object at 0x10d45a128>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>city</th>\n",
       "      <th>temperature</th>\n",
       "      <th>wind</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>03/01/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>17/01/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>31/01/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>14/02/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>-3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28/02/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>13/03/2016</td>\n",
       "      <td>BJ</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date city  temperature  wind\n",
       "0  03/01/2016   BJ            8     5\n",
       "1  17/01/2016   BJ           12     2\n",
       "2  31/01/2016   BJ           19     2\n",
       "3  14/02/2016   BJ           -3     3\n",
       "4  28/02/2016   BJ           19     2\n",
       "5  13/03/2016   BJ            5     3"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dict(list(g))['BJ']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('BJ',          date city  temperature  wind\n",
       "  0  03/01/2016   BJ            8     5\n",
       "  1  17/01/2016   BJ           12     2\n",
       "  2  31/01/2016   BJ           19     2\n",
       "  3  14/02/2016   BJ           -3     3\n",
       "  4  28/02/2016   BJ           19     2\n",
       "  5  13/03/2016   BJ            5     3),\n",
       " ('GZ',           date city  temperature  wind\n",
       "  14  17/07/2016   GZ           10     2\n",
       "  15  31/07/2016   GZ           -1     5\n",
       "  16  14/08/2016   GZ            1     5\n",
       "  17  28/08/2016   GZ           25     4),\n",
       " ('SH',           date city  temperature  wind\n",
       "  6   27/03/2016   SH           -4     4\n",
       "  7   10/04/2016   SH           19     3\n",
       "  8   24/04/2016   SH           20     3\n",
       "  9   08/05/2016   SH           17     3\n",
       "  10  22/05/2016   SH            4     2\n",
       "  11  05/06/2016   SH          -10     4\n",
       "  12  19/06/2016   SH            0     5\n",
       "  13  03/07/2016   SH           -9     5),\n",
       " ('SZ',           date city  temperature  wind\n",
       "  18  11/09/2016   SZ           20     1\n",
       "  19  25/09/2016   SZ          -10     4)]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BJ\n",
      "         date city  temperature  wind\n",
      "0  03/01/2016   BJ            8     5\n",
      "1  17/01/2016   BJ           12     2\n",
      "2  31/01/2016   BJ           19     2\n",
      "3  14/02/2016   BJ           -3     3\n",
      "4  28/02/2016   BJ           19     2\n",
      "5  13/03/2016   BJ            5     3\n",
      "GZ\n",
      "          date city  temperature  wind\n",
      "14  17/07/2016   GZ           10     2\n",
      "15  31/07/2016   GZ           -1     5\n",
      "16  14/08/2016   GZ            1     5\n",
      "17  28/08/2016   GZ           25     4\n",
      "SH\n",
      "          date city  temperature  wind\n",
      "6   27/03/2016   SH           -4     4\n",
      "7   10/04/2016   SH           19     3\n",
      "8   24/04/2016   SH           20     3\n",
      "9   08/05/2016   SH           17     3\n",
      "10  22/05/2016   SH            4     2\n",
      "11  05/06/2016   SH          -10     4\n",
      "12  19/06/2016   SH            0     5\n",
      "13  03/07/2016   SH           -9     5\n",
      "SZ\n",
      "          date city  temperature  wind\n",
      "18  11/09/2016   SZ           20     1\n",
      "19  25/09/2016   SZ          -10     4\n"
     ]
    }
   ],
   "source": [
    "for name, group_df in g:\n",
    "    print(name)\n",
    "    print(group_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "select * from table_1 group by column_1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
